rm(list = ls())
# Load required libraries
library("EBMAforecast")
library("spduration")
library("magrittr")
library("xtable")
library("countrycode")
library("lubridate")
library(MASS) 
library(pscl)
library(foreign)
library(rgdal)
library(RArcInfo)
library(stargazer)
library(mvtnorm)
library(pROC)
library(robust)
library(cvTools)
library(doBy)
library(raster)
library(survival)
library(spduration)
library(DataCombine)
library(plm)
library(AER)
library(lmtest)
library(doMC) # for using multipe processor cores 
library("nnet")
library("glmnet")		
library("PRROC")
library(dplyr)
library(scales)
library(streg)
library(ROCR)
library(separationplot)
library(ggthemes)
## load the data
load("data.RData")


################################# Table 1 #################################
###########################################################################
#define function to get N
count <- function(x){
  length(which(!is.na(x)))
}
# all
summ <- function(x){ c(mean(x,na.rm = T),
                       sd(x, na.rm = T),
                       range(x,na.rm = T),
                       count(x)) }

##first at weaker dyad 
sumdf <- data %>% 
  dplyr::ungroup() %>% 
  select(violence, excluded, nlights_max, prec_gpcp, mountains_mean, 
         neighboringcivilwar, neighboringInterStateWar, polity2, bdist2,
         capdist, diamsec_s,  
         petroleum_s, irregular_dummy, lntpop, gdppcrate, lngdppc, 
         milexpr, violence_t1, violence_t2, violence_s1)

sumdf <- sapply(sumdf, function(x) summ(x))
sumdf <- t(sumdf)
sumdf <- as.data.frame(sumdf)
rownames(sumdf) <- c("政治暴力", "被排斥族群","夜间灯光(最大值)","总降水量",
                     "山地面积","邻国内战数量","邻国处于国际冲突","政体分值", 
                     "到最近邻国距离","距首都距离", "蕴藏冲积矿钻石","蕴藏石油",
                     "非正常领导更迭","总人口(对数)","人均GDP增长率",
                     "人均GDP(对数)","军费开支", "上一年暴力","前两年暴力",
                     "邻村的暴力")
colnames(sumdf)[1:5] <- c("均值", "标准差", "最小值", "最大值", "样本数")
library(xtable)
print.xtable(xtable(sumdf, align='lccccc', 	
                    caption='变量的描述性统计',
                    label='des1'),
             include.rownames=TRUE, sanitize.text.function=identity,
             size='normalsize',
             file='descrip1.tex')


################# Figure 4 ###########################
##########################################################

cordf <-  data %>% 
  dplyr::ungroup() %>% 
  select(violence, excluded, nlights_max, prec_gpcp, mountains_mean, 
         neighboringcivilwar, neighboringInterStateWar, polity2, bdist2,
         capdist, diamsec_s,  
         petroleum_s, irregular_dummy, lntpop, gdppcrate, lngdppc, 
         milexpr, violence_t1, violence_t2, violence_s1)
colnames(cordf) <- seq(1, 20)

colnames(cordf)[1] <- "Y"
library(GGally)
cor1 <- ggcorr(cordf,geom = "circle", nbreaks = 4, 
               legend.position = "bottom", label = F,
               min_size = 0, max_size = 6) 
ggsave("figures/cor1.pdf", plot = cor1,  width = 7, height = 7.2)


## logit model with spatial-temporal lag
logit_m1 <- glm(violence ~ excluded + nlights_max + prec_gpcp + mountains_mean + 
                       neighboringcivilwar + neighboringInterStateWar + polity2 +
                       bdist2 + capdist + diamsec_s + 
                       petroleum_s + irregular_dummy + lntpop + gdppcrate + lngdppc + 
                       milexpr + violence_t1 + violence_t2 + violence_s1 +
                       factor(gwno), data = data, family = "binomial")

source("setup_code.R")
#set ggplot theme
theme_set(theme_gray())
library(extrafont)
library(showtext)
showtext_auto(enable = TRUE)
font_add('KaiTi', 'KaiTi.ttf')


###################### Figure 5(a)  ######################
##########################################################
## cluster SE by GID

coef_logit <- coef_clusterplot(ModelResults = logit_m1,
                               varnames = c('excluded', 'nlights_max','prec_gpcp',
                                            'mountains_mean','neighboringcivilwar',
                                            'neighboringInterStateWar',
                                            'polity2', 'bdist2', 'capdist','diamsec_s', 
                                            'petroleum_s', 'irregular_dummy', 'lntpop', 
                                            'gdppcrate','lngdppc', 
                                            'milexpr','violence_t1','violence_t2',
                                            'violence_s1'),
                               data = data, 
                               clusterid = "gid") + theme_bw() + 
       scale_x_discrete("", labels=c("到最近邻国距离",
                                     "距首都距离","蕴藏冲积矿钻石","被排斥族群",
                                     "人均GDP增长率", "非正常领导更迭","人均GDP(对数)",
                                     "总人口(对数)","军费开支","山地面积",
                                     "邻国处于内战","邻国处于国际冲突",
                                     "夜间灯光(最大值)", "蕴藏石油",
                                     "政体分值", "总降水量",
                                     "邻村的暴力", "上一年暴力","前两年暴力")) +
       theme(axis.text = element_text(size=14),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5,size = 30, face = "bold"),
             axis.title=element_text(size=14))
ggsave("figures/logit.pdf", plot = coef_logit,  width = 8, height = 10)



########################Figure 5 (b) model ###############
##########################################################

##simulation on nighttime light
logit_nofix <- glm(violence ~ excluded + nlights_max + prec_gpcp + mountains_mean + 
                          neighboringcivilwar + neighboringInterStateWar + polity2 +
                          bdist2 + capdist + diamsec_s + 
                          petroleum_s + irregular_dummy + lntpop + gdppcrate + lngdppc + 
                          milexpr + violence_t1 + violence_t2 + violence_s1,
                   data = data, family = "binomial")

###################### Figure 5(b): Marginal effects of explanatory variables
##########################################################
## cluster SE by GID
fd_b1 <- density_ridgeplot(ModelResults = logit_nofix, n.sim = 1000, 
                           varname = c('excluded', 'nlights_max','prec_gpcp',
                                       'mountains_mean','neighboringcivilwar',
                                       'neighboringInterStateWar',
                                       'polity2', 'bdist2', 'capdist','diamsec_s', 
                                       'petroleum_s', 'irregular_dummy', 'lntpop', 
                                       'gdppcrate','lngdppc', 
                                       'milexpr','violence_t1','violence_t2',
                                       'violence_s1'),
                           data = data, 
                           clusterid = "gid", 
                           val1 = 0, 
                           val2 = 1) + theme_bw() +
       scale_y_discrete(labels=c("到最近邻国距离",
                                 "距首都距离","蕴藏冲积矿钻石","被排斥族群",
                                 "人均GDP增长率", "非正常领导更迭","人均GDP(对数)",
                                 "总人口(对数)","军费开支","山地面积",
                                 "邻国处于内战","邻国处于国际冲突",
                                 "夜间灯光(最大值)", "蕴藏石油",
                                 "政体分值", "总降水量",
                                 "邻村的暴力", "上一年暴力","前两年暴力")) +
       labs(x = "预测概率的变化", y = "") + 
       theme(axis.text = element_text(size=14),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5),
             axis.title=element_text(size=14)) 
ggsave("figures/fd1.pdf", plot = fd_b1,  width = 8, height = 10)

############ Alternative: 
#### split the figure by levels of variable
## 
fd_A <- density_ridgeplot(ModelResults = logit_nofix, n.sim = 1000, 
                          varname = c('excluded', 'nlights_max','prec_gpcp',
                                      'mountains_mean', 'bdist2', 'capdist','diamsec_s', 
                                      'petroleum_s'),
                          data = data, 
                          clusterid = "gid", 
                          val1 = 0, 
                          val2 = 1) + theme_bw() +
       scale_y_discrete(labels=c("到最近邻国距离",
                                 "距首都距离","蕴藏冲积矿钻石","被排斥族群",
                                 "山地面积",
                                 "夜间灯光(最大值)", "蕴藏石油",
                                 "总降水量")) +
       labs(x = "预测概率的变化", y = "") + 
       theme(axis.text = element_text(size=14),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5),
             axis.title=element_text(size=14)) 
ggsave("figures/fdA.pdf", plot = fd_A,  width = 8, height = 10)


fd_B <- density_ridgeplot(ModelResults = logit_nofix, n.sim = 1000, 
                          varname = c('neighboringcivilwar',
                                      'neighboringInterStateWar',
                                      'polity2', 'irregular_dummy', 'lntpop', 
                                      'gdppcrate','lngdppc', 
                                      'milexpr'),
                          data = data, 
                          clusterid = "gid", 
                          val1 = 0, 
                          val2 = 1) + theme_bw() +
       fd_B=  fd_B+ scale_y_discrete(labels=c(
              "人均GDP增长率", "非正常领导更迭","人均GDP(对数)",
              "总人口(对数)","军费开支",
              "邻国处于内战","邻国处于国际冲突",
              "政体分值")) +
       labs(x = "预测概率的变化", y = "") + 
       theme(axis.text = element_text(size=14),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5),
             axis.title=element_text(size=14)) 
ggsave("figures/fdb.pdf", plot = fd_B,  width = 8, height = 10)

fd_C <- density_ridgeplot(ModelResults = logit_nofix, n.sim = 1000, 
                          varname = c('violence_t1','violence_t2',
                                      'violence_s1'),
                          data = data, 
                          clusterid = "gid", 
                          val1 = 0, 
                          val2 = 1) + theme_bw() +
       scale_y_discrete(labels=c("邻村的暴力", "上一年暴力","前两年暴力")) +
       labs(x = "预测概率的变化", y = "") + 
       theme(axis.text = element_text(size=14),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5),
             axis.title=element_text(size=14)) 
ggsave("figures/fdc.pdf", plot = fd_C,  width = 8, height = 10)



################# Figure 6(a) ###########################
##########################################################

sim_b1 <- sim_cluster(ModelResults = logit_nofix, n.sim = 1000, data = data, 
                      clusterid = "gid")

######################## Figure 6(a)########################
##########################################################
val1 =4; val2 = 10; intervals = 0.5
varname2_val = seq(val1, val2, by = intervals)
varname2 = "lngdppc"
df <- array(NA, c(length(varname2_val), 4))

for (i in 1:length(varname2_val)){
       X1 <- model.matrix(ModelResults)
       X1[, varname2] = varname2_val[i]
       fd = apply(apply(X1, 1, function (x) plogis(sim_b1 %*% x)), 1, mean)
       
       df[i, 1] <- varname2_val[i]
       df[i, 2] <- mean(fd)
       df[i, 3:4] <- quantile(fd, probs = c(.05,.95))
}

colnames(df) <- c("X", "mean", "lo", "hi")
df <- as.data.frame(df)

p2_b1 = ggplot(df, aes(x=X, y=mean)) +
       theme_gray() +
       geom_ribbon(aes(ymin = lo, ymax = hi), alpha=.2, color = NA) +
       geom_line(aes(y = mean, x = X), size = 0.5, alpha = 0.75) +
       scale_x_continuous(name = "人均GDP（对数）",
                          breaks = seq(val1, val2, by = 1)) +
       theme(axis.text = element_text(size=12),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5,size = 30, face = "bold"),
             axis.title=element_text(size=12)) + 
       ylab("") 
ggsave("figures/p2_gdppdc.pdf", plot = p2_b1,  width = 8, height = 6)


######################## Figure 6(b)########################
##########################################################
###nighttime light
##set simulation
ModelResults = logit_nofix 
val1 =0; val2 = 60; intervals = 5
varname2_val = seq(val1, val2, by = intervals)
varname2 = "nlights_max"
df <- array(NA, c(length(varname2_val), 4))


for (i in 1:length(varname2_val)){
       X1 <- model.matrix(ModelResults)
       X1[, varname2] = varname2_val[i]
       fd = apply(apply(X1, 1, function (x) plogis(sim_b1 %*% x)), 1, mean)
       
       df[i, 1] <- varname2_val[i]
       df[i, 2] <- mean(fd)
       df[i, 3:4] <- quantile(fd, probs = c(.05,.95))
}

colnames(df) <- c("X", "mean", "lo", "hi")
df <- as.data.frame(df)

p_b1 = ggplot(df, aes(x=X, y=mean)) +
       theme_bw() +
       geom_ribbon(aes(ymin = lo, ymax = hi), alpha=.2, color = NA) +
       geom_line(aes(y = mean, x = X), size = 0.5, alpha = 0.75) +
       scale_x_continuous(name = "夜间光亮值（单元网格层次）",
                          breaks = seq(val1, val2, by = 5)) +
       theme(axis.text = element_text(size=12),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5,size = 30, face = "bold"),
             axis.title=element_text(size=12)) + 
       ylab("") 
ggsave("figures/p_night.pdf", plot = p_b1,  width = 8, height = 6)



######################## Figure 7(a)########################
##########################################################
##non spatial-temporal model
logit0 <- glm(violence ~ excluded + nlights_max + prec_gpcp + mountains_mean + 
                     neighboringcivilwar + neighboringInterStateWar + polity2 + bdist2 + capdist + diamsec_s + 
                     petroleum_s + irregular_dummy + lntpop + gdppcrate + lngdppc + 
                     milexpr,
              data = data, family = "binomial")

ModelResults = list(logit_nofix,logit0);linetypes = c("solid", "dotted"); interval=0.2

require(plotROC)
require(pROC)
require(dplyr)
require(ggplot2)
library(purrr)
library(tibble)
library(magrittr)
library(extrafont)
library(showtext)
library(ROCR)
library(caTools)
showtext_auto(enable = TRUE)
#font_add('SimSun', 'SimSun.ttf')
font_add('KaiTi', 'KaiTi.ttf')
pred_dv =lapply(ModelResults, function(x)
       FUN = predict(x, type = "response"))

Y = lapply(ModelResults, function(x) FUN = x$y)

roc = Map(function(x, y) roc(x,y), Y, pred_dv)

roc_df  = lapply(roc, function(x)
       FUN= data.frame(plotx = x$specificities,
                       ploty = rev(x$sensitivities),
                       name = paste("AUC =",
                                    sprintf("%.3f",x$auc)))) %>%
       map_df(., rbind)
p = ggplot(roc_df, aes(x = plotx, y = ploty, color = name, linetype = name)) +
       geom_line() +
       scale_colour_discrete("") +
       scale_linetype_manual(name = '',
                             values=linetypes) +
       geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), alpha = 0.5) +
       #geom_step(aes(linetype = name)) +
       scale_x_continuous(name = "假阳性(False Positive Rate)", expand = c(0.001,0.001)) +
       scale_y_continuous(name = "真阳性(True positive rate)", expand = c(0.001,0.001)) +
       theme_bw() +
       theme(legend.position=c(.8, 0.1),
             text = element_text(size=16, family = 'KaiTi'),
             axis.title.y = element_text(margin = margin(1,1,1,1)),
             axis.text = element_text(size=16),
             axis.title=element_text(size=16)) 

p1 =   p + annotate("segment", x = 0.2, xend = 0.2, y = .45, yend = 0.55,
                    colour="red", arrow=arrow(length=unit(0.1, "inches"))) + 
       annotate("text", x = 0.2, y = 0.45, parse = F, 
                label = "Logit模型",size=6, family = "KaiTi") +
       annotate("segment", x = 0.2, xend = 0.2, y = 0.85, yend = 0.9,
                colour="blue", arrow=arrow(length=unit(0.1, "inches"))) + 
       annotate("text", x = 0.2, y = 0.85, parse = F, 
                label = "时空模型",size=6,family = "KaiTi") 
ggsave("figures/roc.png", plot = p1, width = 8, height = 8)



######################## Figure 7(b)########################
##########################################################

rocdf <- function(pred, obs, data=NULL, model = NULL) {
       # plot_type is "roc" or "pr"
       if (!is.null(data)) {
              pred <- eval(substitute(pred), envir=data)
              obs  <- eval(substitute(obs), envir=data)
       }
       
       rocr_df <- prediction(pred, obs)
       rocr_pr <- performance(rocr_df, "prec", "rec")
       xy <- data.frame(rocr_pr@x.values[[1]], rocr_pr@y.values[[1]])
       xy[1, 2] <- 0
       colnames(xy) <- c("rec", "prec")
       xy$model <- model
       return(xy)
}

## for non-spatial model
pred_logit0 <- predict(logit0, type = "response")
obs <- logit0$y
pred_logitnofix <- predict(logit_nofix, type = "response")
obs_spatial <-logit_nofix$y

prc_df1 <- rocdf(pred_logit0, obs, data = data, model = "Logit模型")
prc_df2 <- rocdf(pred_logitnofix, obs_spatial, data = data, model = "时空模型")
prcdf <- bind_rows(prc_df1, prc_df2)


p2 <- ggplot(data=prcdf, aes(x=rec, y=prec, color = model, linetype = model)) + 
       geom_line(show.legend=TRUE, alpha=0.7) + 
       geom_segment(aes(x = 0, y = 1, xend = 1, yend = 0), alpha = 0.5) +
       scale_x_continuous(name = "查全率(Recall)", expand = c(0.001,0.001)) +
       scale_y_continuous(name = "查准率(Precision)", expand = c(0.001,0.001)) +
       scale_color_discrete() +
       theme_bw() + 
       theme(legend.position=c(.8, 0.1),
             text = element_text(size=16, family = 'KaiTi'),
             axis.title.y = element_text(margin = margin(1,1,1,1)),
             axis.text = element_text(size=16),
             axis.title=element_text(size=16)) 

ggsave("figures/prc.png", plot = p2,  width = 8, height = 8)




######################## Figure 8 ########################
##########################################################

####robustnes check
data <- data %>%
       dplyr::ungroup() %>% 
       dplyr::mutate(tpop = exp(lntpop)) %>% 
       dplyr::group_by(gwno, year) %>% 
       dplyr::mutate(gdptotal_light = sum(nlights_max)) %>% 
       dplyr::mutate(gdppc_light = gdptotal_light / tpop) %>% 
       dplyr::mutate(lngdppc_light = log(gdppc_light + 1)*1000) %>% 
       dplyr::ungroup() 

##simulation on nighttime light
logit_robust1 <- glm(violence ~ excluded + nlights_max + prec_gpcp + mountains_mean + 
                            neighboringcivilwar + neighboringInterStateWar + polity2 + bdist2 + capdist + diamsec_s + 
                            petroleum_s + irregular_dummy + lntpop + gdppcrate + lngdppc_light + 
                            milexpr + violence_t1 + violence_t2 + violence_s1, data = data, family = "binomial")
summary(logit_robust1)

######################## Figure 8(a)########################
##########################################################
coef_logit <- coef_clusterplot(ModelResults = logit_robust1,
                               varnames = c('excluded', 'nlights_max','prec_gpcp',
                                            'mountains_mean','neighboringcivilwar',
                                            'neighboringInterStateWar',
                                            'polity2', 'bdist2', 'capdist','diamsec_s', 
                                            'petroleum_s', 'irregular_dummy', 'lntpop', 
                                            'gdppcrate','lngdppc_light', 
                                            'milexpr','violence_t1','violence_t2',
                                            'violence_s1'),
                               data = data, 
                               clusterid = "gid") + theme_bw() + 
       scale_x_discrete("", labels=c("到最近邻国距离",
                                     "距首都距离","蕴藏冲积矿钻石","被排斥族群",
                                     "人均GDP增长率", "非正常领导更迭","人均夜光值(对数)",
                                     "总人口(对数)","军费开支","山地面积",
                                     "邻国处于内战","邻国处于国际冲突",
                                     "夜间灯光(最大值)", "蕴藏石油",
                                     "政体分值", "总降水量",
                                     "邻村的暴力", "上一年暴力","前两年暴力")) +
       theme(axis.text = element_text(size=14),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5,size = 30, face = "bold"),
             axis.title=element_text(size=14))
ggsave("figures/logit_robust1.pdf", plot = coef_logit,  width = 8, height = 10)

######################## Figure 8(a)########################
##########################################################
##use stata to run multielevel model
library(haven)
#write_dta(data, path = "./Data/data.dta")
## stata code
#cd "/Users/chongchen/Documents/DukeResearch/PredictionProject"

# use "./Data/data.dta", clear
#**Multilevel mixed-effects logistic regression

#melogit violence  excluded  nlights_max  prec_gpcp  mountains_mean ///
#       neighboringcivilwar  neighboringInterStateWar  polity2  bdist2  capdist  diamsec_s /// 
#       petroleum_s  irregular_dummy  lntpop  gdppcrate  lngdppc ///
#       milexpr  violence_t1  violence_t2  violence_s1 || gwno:  || gid:, nonrtolerance 
#regsave using robust2, ci level(90)		

library(readstata13)
robust2 <- read.dta13("robust2.dta")


robust2 <- robust2 %>%
       dplyr::mutate(color_sig = ifelse(ci_lower < 0 & ci_upper > 0, "#ef8a62", "#2c7bb6"))
robust2<- robust2[1:19,]

robust2_fig <- ggplot(robust2, aes(x=var, y=coef, ymin=ci_lower, ymax=ci_upper)) +
       geom_pointrange(size=1, shape=16, colour=robust2$color_sig) +
       coord_flip() +
       geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
       xlab('') + ylab('') +  
       theme(axis.text = element_text(size=14),
             text = element_text(size=11, family = 'KaiTi'),
             plot.title = element_text(hjust = 0.5,size = 30, face = "bold"),
             axis.title=element_text(size=14)) + theme_bw() +
       scale_x_discrete(labels=c("到最近邻国距离",
                                 "距首都距离","蕴藏冲积矿钻石","被排斥族群",
                                 "人均GDP增长率", "非正常领导更迭","人均GDP(对数)",
                                 "总人口(对数)","军费开支","山地面积",
                                 "邻国处于内战","邻国处于国际冲突",
                                 "夜间灯光(最大值)", "蕴藏石油",
                                 "政体分值", "总降水量",
                                 "邻村的暴力", "上一年暴力","前两年暴力")) 
ggsave("figures/robust2_fig.pdf", plot =robust2_fig, width = 8, height = 10)



